/* Copyright (C) 2014 InfiniDB, Inc.
   Copyright (C) 2016 MariaDB Corporaton

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; version 2 of
   the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.


   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
   MA 02110-1301, USA. */

/*****************************************************************************************
* $Id: main.cpp 2203 2013-07-08 16:50:51Z bpaul $
*
*****************************************************************************************/


#include <clocale>

#include <boost/filesystem.hpp>

#include "columnstoreversion.h"
#include "processmanager.h"
#include "installdir.h"

#include "utils_utf8.h"

#include "crashtrace.h"

using namespace std;
using namespace logging;
using namespace messageqcpp;
using namespace processmanager;
using namespace oam;
using namespace alarmmanager;
using namespace threadpool;
//using namespace procheartbeat;
using namespace config;

bool runStandby = false;
bool runCold = false;
string systemName = "system";
string iface_name;
string cloud;
bool amazon = false;
string PMInstanceType;
string UMInstanceType;
string AmazonPMFailover = "y";
string DataRedundancyConfig = "n";
bool rootUser = true;
string USER = "root";
bool HDFS = false;
string localHostName;
string PMwithUM = "n";
string MySQLRep = "n";
string tmpLogDir;


// pushing the ACTIVE_ALARMS_FILE to all nodes every 10 seconds.
const int ACTIVE_ALARMS_PUSHING_INTERVAL = 10;

typedef   map<string, int>	moduleList;
moduleList	moduleInfoList;

extern HeartBeatProcList hbproclist;
extern pthread_mutex_t THREAD_LOCK;
extern bool startsystemthreadStop;
extern string gdownActiveOAMModule;
extern int startsystemthreadStatus;
extern vector<string> downModuleList;
extern bool startFailOver;
extern bool gOAMParentModuleFlag;

static void messageThread(Configuration config);
static void alarmMessageThread(Configuration config);
static void sigUser1Handler(int sig);
static void startMgrProcessThread();
static void hdfsActiveAlarmsPushingThread();
//static void pingDeviceThread();
//static void heartbeatProcessThread();
//static void heartbeatMsgThread();

/*****************************************************************************************
* @brief	main
*
* purpose:	request launching of Mgr controlled processes and wait for incoming messages
*
*****************************************************************************************/
int main(int argc, char** argv)
{
#ifndef _MSC_VER
    setuid(0); // set effective ID to root; ignore return status
#endif
    // get and set locale language
    string systemLang = "C";

    setlocale(LC_ALL, systemLang.c_str());

    // This is unset due to the way we start it
    program_invocation_short_name = const_cast<char*>("ProcMgr");

    struct sigaction ign;
    memset(&ign, 0, sizeof(ign));
    ign.sa_handler = fatalHandler;
    sigaction(SIGSEGV, &ign, 0);
    sigaction(SIGABRT, &ign, 0);
    sigaction(SIGFPE, &ign, 0);

    Oam oam;

    //check if root-user
    int user;
    user = getuid();

    if (user != 0)
        rootUser = false;

    char* p = getenv("USER");

    if (p && *p)
        USER = p;

    ProcessLog log;
    Configuration config;
    ProcessManager processManager(config, log);
    ALARMManager aManager;

    log.writeLog(__LINE__, " ");
    log.writeLog(__LINE__, "**********Process Manager Started**********");

    //Ignore SIGPIPE signals
    signal(SIGPIPE, SIG_IGN);

    //Ignore SIGHUP signals
    signal(SIGHUP, SIG_IGN);

    //create SIGUSR1 handler to get configuration updates
    signal(SIGUSR1, sigUser1Handler);

    // Get System Name
    try
    {
        oam.getSystemConfig("SystemName", systemName);
    }
    catch (...)
    {}

    //get cloud setting
    try
    {
        oam.getSystemConfig( "Cloud", cloud);
    }
    catch (...) {}

    //get amazon parameters
    if ( cloud == "amazon-ec2" || cloud == "amazon-vpc" )
    {
        oam.getSystemConfig("PMInstanceType", PMInstanceType);
        oam.getSystemConfig("UMInstanceType", UMInstanceType);
        oam.getSystemConfig("AmazonPMFailover", AmazonPMFailover);

        amazon = true;
    }

    //get gluster config
    try
    {
        oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig);
    }
    catch (...)
    {
        DataRedundancyConfig = "n";
    }

    //hdfs / hadoop config
    string DBRootStorageType;

    try
    {
        oam.getSystemConfig( "DBRootStorageType", DBRootStorageType);
    }
    catch (...) {}

    if ( DBRootStorageType == "hdfs" )
        HDFS = true;

    log.writeLog(__LINE__, "Main: DBRootStorageType = " + DBRootStorageType, LOG_TYPE_DEBUG);

    //PMwithUM config
    try
    {
        oam.getSystemConfig( "PMwithUM", PMwithUM);
    }
    catch (...)
    {
        PMwithUM = "n";
    }

    try
    {
        oam.getSystemConfig("MySQLRep", MySQLRep);
    }
    catch (...)
    {
        MySQLRep = "n";
    }

    // get system uptime and alarm if this is a restart after module outage
    if ( gOAMParentModuleFlag )
    {
        log.writeLog(__LINE__, "Running Active");
        log.writeLog(__LINE__, "Running Active", LOG_TYPE_DEBUG);
    }
    else
    {
        log.writeLog(__LINE__, "Running Standby");
        log.writeLog(__LINE__, "Running Standby", LOG_TYPE_DEBUG);
        runStandby = true;
    }

    //get local module main IP address
    ModuleConfig moduleconfig;
    oam.getSystemConfig(config.moduleName(), moduleconfig);
    HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin();
    string localIPaddr = (*pt1).IPAddr;
    localHostName = (*pt1).HostName;

    struct ifaddrs* addrs, *iap;
    struct sockaddr_in* sa;
    char buf[32];

    getifaddrs(&addrs);

    for (iap = addrs; iap != NULL; iap = iap->ifa_next)
    {

        if (iap->ifa_addr && (iap->ifa_flags & IFF_UP) && iap->ifa_addr->sa_family == AF_INET)
        {
            sa = (struct sockaddr_in*)(iap->ifa_addr);
            inet_ntop(iap->ifa_addr->sa_family, (void*) & (sa->sin_addr), buf, sizeof(buf));

            if (!strcmp(localIPaddr.c_str(), buf))
            {
                iface_name = iap->ifa_name;
                break;
            }
        }
    }

    freeifaddrs(addrs);
    log.writeLog(__LINE__, "Main Ethernet Port = " + iface_name, LOG_TYPE_DEBUG);

    //get tmp log directory
    tmpLogDir = startup::StartUp::tmpDir();

    //
    //start a thread to ping all system modules
    //
    if (runStandby)
    {
        //running standby after startup
        try
        {
            oam.processInitComplete("ProcessManager", oam::STANDBY);
            log.writeLog(__LINE__, "processInitComplete Successfully Called", LOG_TYPE_DEBUG);
        }
        catch (exception& ex)
        {
            string error = ex.what();
            log.writeLog(__LINE__, "EXCEPTION ERROR on processInitComplete: " + error, LOG_TYPE_ERROR);
        }
        catch (...)
        {
            log.writeLog(__LINE__, "EXCEPTION ERROR on processInitComplete: Caught unknown exception!", LOG_TYPE_ERROR);
        }

        // create message thread
        pthread_t MessageThread;
        int ret = pthread_create (&MessageThread, NULL, (void* (*)(void*)) &messageThread, &config);

        if ( ret != 0 )
            log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR);

        // create alarm message thread
        pthread_t AlarmMessageThread;
        ret = pthread_create (&AlarmMessageThread, NULL, (void* (*)(void*)) &alarmMessageThread, &config);

        if ( ret != 0 )
            log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR);

        //monitor OAM Parent Module for failover
        while (true)
        {
            if ( processManager.OAMParentModuleChange() == oam::API_SUCCESS )
                break;

            log.writeLog(__LINE__, "OAMParentModuleChange failure", LOG_TYPE_WARNING);
            // GO TRY AGAIN
        }

        pthread_t srvThread;
        int status = pthread_create (&srvThread, NULL, (void* (*)(void*)) &pingDeviceThread, NULL);

        if ( status != 0 )
            log.writeLog(__LINE__, "pingDeviceThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);
    }
    else
    {
        //running active after startup
        //Update DBRM section of Columnstore.xml
        processManager.updateWorkerNodeconfig();
//		processManager.distributeConfigFile("system");

        pthread_t srvThread;
        int status = pthread_create (&srvThread, NULL, (void* (*)(void*)) &pingDeviceThread, NULL);

        if ( status != 0 )
            log.writeLog(__LINE__, "pingDeviceThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);

        // if HDFS, create a thread to push an image of activeAlarms to HDFS filesystem
        if (HDFS)
        {
            pthread_t hdfsAlarmThread;
            int status = pthread_create(&hdfsAlarmThread, NULL, (void* (*)(void*)) &hdfsActiveAlarmsPushingThread, NULL);

            if ( status != 0 )
                log.writeLog(__LINE__, "hdfsActiveAlarmsPushingThread pthread_create failed, return code = " + oam.itoa(status), LOG_TYPE_ERROR);
        }

        sleep(5);

        SystemStatus systemstatus;

        try
        {
            oam.getSystemStatus(systemstatus);
        }
        catch (exception& ex)
        {
//			string error = ex.what();
//			log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: " + error, LOG_TYPE_ERROR);
        }
        catch (...)
        {
//			log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: Caught unknown exception!", LOG_TYPE_ERROR);
        }

        if (systemstatus.SystemOpState != oam::MAN_OFFLINE &&
                systemstatus.SystemOpState != oam::ACTIVE)
        {
            pthread_t mgrProcThread;
            int status = pthread_create (&mgrProcThread, NULL, (void* (*)(void*)) &startMgrProcessThread, NULL);

            if ( status != 0 )
                log.writeLog(__LINE__, "startMgrProcessThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);
        }

        try
        {
            oam.processInitComplete("ProcessManager");
            log.writeLog(__LINE__, "processInitComplete Successfully Called", LOG_TYPE_DEBUG);
        }
        catch (exception& ex)
        {
            string error = ex.what();
            log.writeLog(__LINE__, "EXCEPTION ERROR on processInitComplete: " + error, LOG_TYPE_ERROR);
        }
        catch (...)
        {
            log.writeLog(__LINE__, "EXCEPTION ERROR on processInitComplete: Caught unknown exception!", LOG_TYPE_ERROR);
        }

        //make sure ProcMgr IP Address is configured correctly
        try
        {
            Config* sysConfig = Config::makeConfig();

            // get Standby IP address
            ModuleConfig moduleconfig;
            oam.getSystemConfig(config.moduleName(), moduleconfig);
            HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin();
            string IPaddr = (*pt1).IPAddr;

            sysConfig->setConfig("ProcMgr", "IPAddr", IPaddr);
            sysConfig->setConfig("ProcMgr_Alarm", "IPAddr", IPaddr);

            log.writeLog(__LINE__, "set ProcMgr IPaddr to " + IPaddr, LOG_TYPE_DEBUG);

            //update Calpont Config table
            try
            {
                sysConfig->write();
            }
            catch (...)
            {
                log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR);
            }
        }
        catch (...)
        {
            log.writeLog(__LINE__, "ERROR: makeConfig failed", LOG_TYPE_ERROR);
        }

        try
        {
            oam.distributeConfigFile();
        }
        catch (...)
        {}

        // create message thread
        pthread_t MessageThread;
        int ret = pthread_create (&MessageThread, NULL, (void* (*)(void*)) &messageThread, &config);

        if ( ret != 0 )
            log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR);

        // create alarm message thread
        pthread_t AlarmMessageThread;
        ret = pthread_create (&AlarmMessageThread, NULL, (void* (*)(void*)) &alarmMessageThread, &config);

        if ( ret != 0 )
            log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR);
    }

    //
    //start a thread to process heartbeat checks
    //
//	pthread_t heartThread;
//	pthread_create (&heartThread, NULL, (void*(*)(void*)) &heartbeatProcessThread, NULL);

    //
    //start a thread to read heartbeat messages
    //
//	pthread_t heartMsgThread;
//	pthread_create (&heartMsgThread, NULL, (void*(*)(void*)) &heartbeatMsgThread, NULL);

    // suspend forever
    while (true)
    {
        sleep(1000);
    }
}

/******************************************************************************************
* @brief	messageThread
*
* purpose:	Read incoming messages
*
******************************************************************************************/
static void messageThread(Configuration config)
{
    ProcessLog log;
    ProcessManager processManager(config, log);
    Oam oam;

    //check for running active, then launch
    while (true)
    {
        if ( !runStandby)
            break;

        sleep (1);
    }

    log.writeLog(__LINE__, "Message Thread started ..", LOG_TYPE_DEBUG);

    //read and cleanup port before trying to use
    try
    {
        Config* sysConfig = Config::makeConfig();
        string port = sysConfig->getConfig("ProcMgr", "Port");
        string cmd = "fuser -k " + port + "/tcp >/dev/null 2>&1";

        system(cmd.c_str());
    }
    catch (...)
    {
    }

    //
    //waiting for request
    //
    IOSocket fIos;

    for (;;)
    {
        try
        {
            MessageQueueServer procmgr("ProcMgr");

            for (;;)
            {
                try
                {
                    fIos = procmgr.accept();

                    pthread_t messagethread;
                    int status = pthread_create (&messagethread, NULL, (void* (*)(void*)) &processMSG, &fIos);

                    if ( status != 0 )
                        log.writeLog(__LINE__, "messagethread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);
                }
                catch (...)
                {}

            }
        }
        catch (exception& ex)
        {
            string error = ex.what();
            log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr:" + error, LOG_TYPE_ERROR);

            // takes 2 - 4 minites to free sockets, sleep and retry
            sleep(60);
        }
        catch (...)
        {
            log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr: Caught unknown exception!", LOG_TYPE_ERROR);

            // takes 2 - 4 minites to free sockets, sleep and retry
            sleep(60);
        }
    }
}

/******************************************************************************************
* @brief	alarmMesssageThread
*
* purpose:	Read incoming alarm messages
*
******************************************************************************************/
static void alarmMessageThread(Configuration config)
{
    ProcessLog log;
    ProcessManager processManager(config, log);
    Oam oam;

    ByteStream msg;

    //check for running active, then launch
    while (true)
    {
        if ( !runStandby)
            break;

        sleep (1);
    }

    log.writeLog(__LINE__, "Alarm Message Thread started ..", LOG_TYPE_DEBUG);

    //read and cleanup port before trying to use
    try
    {
        Config* sysConfig = Config::makeConfig();
        string port = sysConfig->getConfig("ProcMgr_Alarm", "Port");
        string cmd = "fuser -k " + port + "/tcp >/dev/null 2>&1";

        system(cmd.c_str());
    }
    catch (...)
    {
    }

    //
    //waiting for request
    //
    IOSocket fIos;

    for (;;)
    {
        try
        {
            MessageQueueServer procmgr("ProcMgr_Alarm");

            for (;;)
            {
                try
                {
                    fIos = procmgr.accept();

                    try
                    {
                        msg = fIos.read();

                        if (msg.length() <= 0)
                        {
                            fIos.close();
                            continue;
                        }

                        //log.writeLog(__LINE__,  "MSG RECEIVED: Process Alarm Message");

                        ByteStream::byte alarmID;
                        std::string componentID;
                        ByteStream::byte state;
                        std::string ModuleName;
                        std::string processName;
                        ByteStream::byte pid;
                        ByteStream::byte tid;

                        msg >> alarmID;
                        msg >> componentID;
                        msg >> state;
                        msg >> ModuleName;
                        msg >> processName;
                        msg >> pid;
                        msg >> tid;

                        Alarm calAlarm;

                        calAlarm.setAlarmID (alarmID);
                        calAlarm.setComponentID (componentID);
                        calAlarm.setState (state);
                        calAlarm.setSname (ModuleName);
                        calAlarm.setPname (processName);
                        calAlarm.setPid (pid);
                        calAlarm.setTid (tid);

                        ALARMManager aManager;
                        aManager.processAlarmReport(calAlarm);

                        fIos.close();
                    }
                    catch (exception& ex)
                    {
                        string error = ex.what();
                        log.writeLog(__LINE__, "EXCEPTION ERROR on read for ProcMgr_Alarm:" + error, LOG_TYPE_ERROR);
                        fIos.close();
                        continue;
                    }
                    catch (...)
                    {
                        log.writeLog(__LINE__, "EXCEPTION ERROR on read for ProcMgr_Alarm: Caught unknown exception!", LOG_TYPE_ERROR);
                        fIos.close();
                        continue;
                    }
                }
                catch (exception& ex)
                {
                    string error = ex.what();
                    log.writeLog(__LINE__, "EXCEPTION ERROR on accept for ProcMgr_Alarm:" + error, LOG_TYPE_ERROR);
                    continue;
                }
                catch (...)
                {
                    log.writeLog(__LINE__, "EXCEPTION ERROR on accept for ProcMgr_Alarm: Caught unknown exception!", LOG_TYPE_ERROR);
                    continue;
                }
            }
        }
        catch (exception& ex)
        {
            string error = ex.what();
            log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr_Alarm:" + error, LOG_TYPE_ERROR);

            sleep(1);
        }
        catch (...)
        {
            log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr_Alarm: Caught unknown exception!", LOG_TYPE_ERROR);

            sleep(1);
        }
    }
}

/******************************************************************************************
* @brief	sigUser1Handler
*
* purpose:	Handler SIGUSER1 signal and initial failover
*
******************************************************************************************/
static void sigUser1Handler(int sig)
{
    ProcessLog log;
    Configuration config;
    ProcessManager processManager(config, log);
    Oam oam;
    log.writeLog(__LINE__, "SIGUSER1 received, set startFailOver = true", LOG_TYPE_DEBUG);

    startFailOver = true;
}

/*****************************************************************************************
* @brief	Start Mgr Process by module Thread
*
* purpose:	Send Messages to Module Process Monitors to start Processes
*
*****************************************************************************************/
static void startMgrProcessThread()
{
    ProcessLog log;
    Configuration config;
    ProcessManager processManager(config, log);
    Oam oam;
    SystemModuleTypeConfig systemmoduletypeconfig;
    ModuleTypeConfig PMSmoduletypeconfig;
    ALARMManager aManager;

    int waitTime = 180;

    log.writeLog(__LINE__, "startMgrProcessThread launched", LOG_TYPE_DEBUG);

    //get calpont software version and release
    string localSoftwareInfo = columnstore_version + columnstore_release;
    //get systemStartupOffline
    string systemStartupOffline = "n";

    try
    {
        Config* sysConfig = Config::makeConfig();

        systemStartupOffline = sysConfig->getConfig("Installation", "SystemStartupOffline");
    }
    catch (...)
    {
        log.writeLog(__LINE__, "ERROR: Problem getting systemStartupOffline from the Calpont System Configuration file", LOG_TYPE_ERROR);
        systemStartupOffline = "n";
    }

    if ( systemStartupOffline == "y" )
        log.writeLog(__LINE__, "SystemStartupOffline set to 'y', Not starting up Calpont Database Processes", LOG_TYPE_INFO);

    try
    {
        oam.getSystemConfig(systemmoduletypeconfig);
    }
    catch (exception& ex)
    {
        string error = ex.what();
        log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
    }
    catch (...)
    {
        log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
    }

    //get Distributed Install
    string DistributedInstall = "n";

    try
    {
        oam.getSystemConfig("DistributedInstall", DistributedInstall);
    }
    catch (...)
    {
        log.writeLog(__LINE__, "ERROR: get DistributedInstall", LOG_TYPE_ERROR);
    }

    //Send out a start service just to make sure Columnstore is runing on remote nodes
    //note this only works for systems with ssh-keys
    /*	for( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
    	{
    		int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
    		if( moduleCount == 0)
    			continue;

    		DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
    		for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
    		{
    		      //skip OAM Parent module
    		      if ( (*pt).DeviceName == config.moduleName() )
    			      continue;

    		      HostConfigList::iterator pt1 = (*pt).hostConfigList.begin();
    		      for( ; pt1 != (*pt).hostConfigList.end() ; pt1++)
    		      {
    			      //run remote command script
    			      string cmd = startup::StartUp::installDir() + "/bin/remote_command.sh " + (*pt1).IPAddr + " ssh '" + startup::StartUp::installDir() + "/bin/columnstore restart' 0";
    			      system(cmd.c_str());
    		      }
    		}
    	}
    */
    //distribute system and process config files
    processManager.distributeConfigFile("system");
    processManager.distributeConfigFile("system", "ProcessConfig.xml");

    //send out moduleName to remote nodes, this will be used to startup new installed nodes
    {
        int status = API_SUCCESS;
        int k = 0;

        for ( ; k < waitTime ; k++ )
        {
            if ( startsystemthreadStop )
            {
                processManager.setSystemState(oam::MAN_OFFLINE);

                // exit thread
                log.writeLog(__LINE__, "startMgrProcessThread Exit with a stop system flag", LOG_TYPE_DEBUG);
                pthread_exit(0);
            }

            status = API_SUCCESS;

            for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
            {
                int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;

                if ( moduleCount == 0)
                    continue;

                DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();

                for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
                {
                    string moduleName = (*pt).DeviceName;

                    //skip OAM Parent module
                    if ( (*pt).DeviceName == config.moduleName() )
                        continue;

                    if ( (*pt).DisableState == oam::MANDISABLEDSTATE ||
                            (*pt).DisableState == oam::AUTODISABLEDSTATE )
                        continue;

                    int ret = processManager.configureModule(moduleName);

                    if ( ret != API_SUCCESS )
                        status = ret;
                }
            }

            //get out of loop if all modules updated
            if ( status == API_SUCCESS )
                break;

            //retry after sleeping for a bit
            sleep(1);
        }

        if ( k == waitTime || status == API_FAILURE)
        {
            // system didn't successfull restart
            processManager.setSystemState(oam::FAILED);
            // exit thread
            log.writeLog(__LINE__, "startMgrProcessThread Exit with a failure, not all ProcMons running", LOG_TYPE_CRITICAL);
            log.writeLog(__LINE__, "startMgrProcessThread Exit - failure", LOG_TYPE_DEBUG);
            pthread_exit(0);
        }
    }

    //wait until all modules are up after a system reboot
    int i = 0;

    for ( ; i < waitTime ; i++ )
    {
        if ( startsystemthreadStop )
        {
            processManager.setSystemState(oam::MAN_OFFLINE);

            // exit thread
            log.writeLog(__LINE__, "startMgrProcessThread Exit with a stop system flag", LOG_TYPE_DEBUG);
            pthread_exit(0);
        }

        int status = API_SUCCESS;

        for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
        {
            if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType == "pm" )
                PMSmoduletypeconfig = systemmoduletypeconfig.moduletypeconfig[i];

            int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;

            if ( moduleCount == 0)
                continue;

            DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();

            for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
            {
                string moduleName = (*pt).DeviceName;

                // Is Module UP
                try
                {
                    bool degraded;
                    int opState = oam::ACTIVE;
                    oam.getModuleStatus(moduleName, opState, degraded);

                    if ( opState == oam::MAN_DISABLED )
                        //mark all processes running on module man-offline except ProcMon
                        processManager.setProcessStates(moduleName, oam::MAN_OFFLINE);

                    if ( opState == oam::AUTO_DISABLED)
                        //mark all processes running on module auto-offline
                        processManager.setProcessStates(moduleName, oam::AUTO_OFFLINE);

                    if (opState == oam::INITIAL ||
                            opState == oam::DOWN)
                    {
                        //a module is not up
                        status = API_MINOR_FAILURE;
                        break;
                    }
                }
                catch (exception& ex)
                {
//					string error = ex.what();
//					log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
                }
                catch (...)
                {
//					log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
                }
            }

            if ( status == API_MINOR_FAILURE)
            {
                sleep(1);
                break;
            }
        }

        if ( status == API_SUCCESS)
            //all modules are up
            break;
    }

    if ( i == waitTime )
    {
        // system didn't successfull restart
        processManager.setSystemState(oam::FAILED);

        // exit thread
        log.writeLog(__LINE__, "startMgrProcessThread Exit with a failure, not all modules are UP", LOG_TYPE_CRITICAL);
        pthread_exit(0);
    }

    //configure the PMS settings
    processManager.updatePMSconfig();

    if (HDFS)
        //distribute config file
        processManager.distributeConfigFile("system");

    //now wait until all procmons are ACTIVE and validate rpms on each module
    int status = API_SUCCESS;
    int k = 0;

    for ( ; k < waitTime ; k++ )
    {
        if ( startsystemthreadStop )
        {
            processManager.setSystemState(oam::MAN_OFFLINE);

            // exit thread
            log.writeLog(__LINE__, "startMgrProcessThread Exit with a stop system flag", LOG_TYPE_DEBUG);
            pthread_exit(0);
        }

        status = API_SUCCESS;

        for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
        {
            int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;

            if ( moduleCount == 0)
                continue;

            DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();

            for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
            {
                string moduleName = (*pt).DeviceName;

                if ( (*pt).DisableState == oam::MANDISABLEDSTATE ||
                        (*pt).DisableState == oam::AUTODISABLEDSTATE )
                    continue;

                int moduleOpState = oam::ACTIVE;

                // check module state
                try
                {
                    bool degraded;
                    oam.getModuleStatus(moduleName, moduleOpState, degraded);

                    // if up, set to MAN_INIT
                    if ( HDFS &&
                            (moduleOpState == oam::UP) )
                    {
                        processManager.setModuleState(moduleName, oam::MAN_INIT);
                    }
                }
                catch (exception& ex)
                {
//					string error = ex.what();
//					log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
                }
                catch (...)
                {
//					log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
                }

                // Is Module's ProcMon ACTIVE and module status has been updated
                int opState = oam::ACTIVE;

                try
                {
                    ProcessStatus procstat;
                    oam.getProcessStatus("ProcessMonitor", moduleName, procstat);
                    opState = procstat.ProcessOpState;

                    if (opState != oam::ACTIVE)
                    {
                        //skip if Not ACTIVE
                        log.writeLog(__LINE__, "Module ProcMon not active yet: " + moduleName, LOG_TYPE_DEBUG);
                        status = API_MINOR_FAILURE;
                        continue;
                    }
                }
                catch (exception& ex)
                {
//					string error = ex.what();
//					log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
                    status = API_MINOR_FAILURE;
                    continue;
                }
                catch (...)
                {
//					log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
                    status = API_MINOR_FAILURE;
                    continue;
                }

                //skip OAM Parent module
                if ( moduleName == config.moduleName() )
                    continue;

                //ProcMon ACTIVE, validate the software release and version of that module
                ByteStream msg;
                ByteStream::byte requestID = GETSOFTWAREINFO;
                msg << requestID;

                string moduleSoftwareInfo = processManager.sendMsgProcMon1( moduleName, msg, requestID );

                if ( moduleSoftwareInfo == "FAILED" )
                    continue;

                if ( localSoftwareInfo != moduleSoftwareInfo )
                {
                    // module not running on same Calpont Software build as this local Director
                    // alarm and fail the module
                    log.writeLog(__LINE__, "Software Version mismatch : " + moduleName + "/" + localSoftwareInfo + "/" + moduleSoftwareInfo, LOG_TYPE_CRITICAL);

                    aManager.sendAlarmReport(moduleName.c_str(), INVALID_SW_VERSION, SET);
                    processManager.setModuleState(moduleName, oam::FAILED);
                    status = API_FAILURE;
                    break;
                }
            }
        }

        //get out of loop if all modules ACTTVE or MAN_OFFLINE
        if ( status == API_SUCCESS )
        {
            if ( systemStartupOffline == "y" )
            {
                processManager.setSystemState(oam::MAN_OFFLINE);
                log.writeLog(__LINE__, "SystemStartupOffline set to 'y', Not starting up Calpont Database Processes", LOG_TYPE_DEBUG);
            }

            break;
        }
        else
        {
            //get out of loop if start module failed
            if ( status == API_FAILURE )
                break;

            //retry after sleeping for a bit
            sleep(1);
        }
    }

    if ( k == waitTime || status == API_FAILURE)
    {
        // system didn't successfull restart
        processManager.setSystemState(oam::FAILED);
        // exit thread
        log.writeLog(__LINE__, "startMgrProcessThread Exit with a failure, not all ProcMons ACTIVE", LOG_TYPE_CRITICAL);
        log.writeLog(__LINE__, "startMgrProcessThread Exit - failure", LOG_TYPE_DEBUG);
        pthread_exit(0);
    }
    else
    {
        //distribute config file
//		processManager.distributeConfigFile("system");

        if ( systemStartupOffline == "n" && status == API_SUCCESS )
        {
            oam::DeviceNetworkList devicenetworklist;
            pthread_t startsystemthread;
            int status = pthread_create (&startsystemthread, NULL, (void* (*)(void*)) &startSystemThread, &devicenetworklist);

            if ( status != 0 )
            {
                log.writeLog(__LINE__, "STARTSYSTEMS: pthread_create failed, return status = " + oam.itoa(status));
                status = API_FAILURE;
            }

            if (status == 0)
            {
                pthread_join(startsystemthread, NULL);
                status = startsystemthreadStatus;
            }

            if ( status != API_SUCCESS )
            {
                // system didn't successfull restart
                processManager.setSystemState(oam::FAILED);
                log.writeLog(__LINE__, "startMgrProcessThread Exit with a failure, error returned from startSystemThread", LOG_TYPE_CRITICAL);
            }
            else
                //distribute config file
                processManager.distributeConfigFile("system");
        }
    }

    // exit thread
    log.writeLog(__LINE__, "startMgrProcessThread Exit", LOG_TYPE_DEBUG);
    pthread_exit(0);
}


/*****************************************************************************************
* @brief	pingDeviceThread
*
* purpose:	perform ping testing on the devices within the system
*
*****************************************************************************************/
void pingDeviceThread()
{
    ProcessLog log;
    Configuration config;
    ProcessManager processManager(config, log);
    Oam oam;
    ModuleTypeConfig moduletypeconfig;
    ALARMManager aManager;
    BRM::DBRM dbrm;

    log.writeLog(__LINE__, "pingDeviceThread launched", LOG_TYPE_DEBUG);

    string cmdLine = "ping ";
    string cmdOption = " -c 1 -w 5 >> /dev/null";
    string cmd;
    string deviceIP;

    //
    // Get Module Info
    //
    SystemModuleTypeConfig systemModuleTypeConfig;

    try
    {
        oam.getSystemConfig(systemModuleTypeConfig);
    }
    catch (exception& ex)
    {
        string error = ex.what();
        log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
    }
    catch (...)
    {
        log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
    }

    //Build the initial list, clear module state

    for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++)
    {
        int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount;

        if ( moduleCount == 0 )
            // skip of no modules configured
            continue;

        DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin();

        for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++)
        {
            moduleInfoList.insert(moduleList::value_type((*pt).DeviceName, 0));
        }
    }

    typedef   map<string, int>	nicList;
    nicList	nicInfoList;

    //Build the initial list, clear NIC state

    for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++)
    {
        int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount;

        if ( moduleCount == 0 )
            // skip of no modules configured
            continue;

        DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin();

        for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++)
        {

            HostConfigList::iterator pt1 = (*pt).hostConfigList.begin();

            for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++ )
            {
                nicInfoList.insert(moduleList::value_type((*pt1).HostName, 0));
            }
        }
    }

    //
    // Get ext device info
    //
    SystemExtDeviceConfig systemextdeviceconfig;

    try
    {
        oam.getSystemConfig(systemextdeviceconfig);
    }
    catch (exception& ex)
    {
        string error = ex.what();
        log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
    }
    catch (...)
    {
//		log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
    }

    typedef   map<string, int>	extDeviceList;
    extDeviceList	extDeviceInfoList;

    //Build the initial list, clear ext device state

    for ( unsigned int i = 0 ; i < systemextdeviceconfig.Count; i++)
    {
        string name = systemextdeviceconfig.extdeviceconfig[i].Name;
        extDeviceInfoList.insert(extDeviceList::value_type(name, 0));
    }

    //storage config
    string DBRootStorageType;

    try
    {
        oam.getSystemConfig( "DBRootStorageType", DBRootStorageType);
    }
    catch (...) {}

    log.writeLog(__LINE__, "pingDeviceThread: DBRootStorageType = " + DBRootStorageType, LOG_TYPE_DEBUG);

    int rtnCode = 0;
    Configuration configData;
    SystemStatus systemstatus;

    bool enableModuleMonitor = true;

    bool LANOUTAGEACTIVE = false;
    bool HOTSTANDBYACTIVE = false;
    bool downActiveOAMModule = false;

    // monitor module and external device loop

    while (true)
    {
        //don't peform module test if system is MAN_OFFLINE or not getting status's
        while (true)
        {
            SystemStatus systemstatus;

            try
            {
                oam.getSystemStatus(systemstatus);

                if (systemstatus.SystemOpState == oam::MAN_OFFLINE )
                    sleep(5);
                else
                    break;
            }
            catch (...)
            {
                sleep(5);
            }
        }

        // Module Heartbeat period and failure count
        int ModuleHeartbeatPeriod;
        int ModuleHeartbeatCount;

        try
        {
            oam.getSystemConfig("ModuleHeartbeatPeriod", ModuleHeartbeatPeriod);
            oam.getSystemConfig("ModuleHeartbeatCount", ModuleHeartbeatCount);
            ModuleHeartbeatPeriod = ModuleHeartbeatPeriod * 10;
        }
        catch (exception& ex)
        {
            string error = ex.what();
            log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
            sleep(5);
            continue;
        }
        catch (...)
        {
            log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
            sleep(5);
            continue;
        }

        // skip testing if Heartbeat is disable
        if ( ModuleHeartbeatPeriod <= 0 )
        {
            if ( enableModuleMonitor )
                log.writeLog(__LINE__, "ModuleHeartbeatPeriod set to disabled", LOG_TYPE_DEBUG);

            enableModuleMonitor = false;
        }
        else
        {
            if ( !enableModuleMonitor && moduleInfoList.size() > 1 )
                log.writeLog(__LINE__, "ModuleHeartbeatPeriod set to enabled", LOG_TYPE_DEBUG);

            enableModuleMonitor = true;
        }

        //single server system
        if ( moduleInfoList.size() <= 1)
            enableModuleMonitor = false;

        //
        // ping NIC
        //

        // read each time to catch updates
        pthread_mutex_lock(&THREAD_LOCK);
        systemModuleTypeConfig.moduletypeconfig.clear();

        try
        {
            oam.getSystemConfig(systemModuleTypeConfig);
        }
        catch (exception& ex)
        {
            string error = ex.what();
            log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
            sleep(5);
            continue;
        }
        catch (...)
        {
            log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
            sleep(5);
            continue;
        }

        pthread_mutex_unlock(&THREAD_LOCK);

        bool LANOUTAGESUPPORT = true;
        bool LOCALNICDOWN = false;

        if (enableModuleMonitor)
        {
            //test main local Ethernet interface status
            for ( int count = 0 ; ; count ++)
            {
                int sockfd;
                struct ifreq ifr;

                sockfd = socket(AF_INET, SOCK_DGRAM, 0);

                if (sockfd == -1)
                {
                    log.writeLog(__LINE__, "Could not get socket to check", LOG_TYPE_ERROR);
                    close(sockfd);
                    break;
                }

                /* get interface name */
                strncpy(ifr.ifr_name, iface_name.c_str(), IFNAMSIZ);

                /* Read interface flags */
                if (ioctl(sockfd, SIOCGIFFLAGS, &ifr) < 0)
                {
                    // not supported
                    close(sockfd);
                    break;
                }

                if (ifr.ifr_flags & IFF_UP)
                {
                    // ethernet port is up, continue on
                    close(sockfd);
                    break;
                }
                else
                {
                    // ethernet port is down
                    log.writeLog(__LINE__, "NIC #1 is DOWN", LOG_TYPE_WARNING);

                    if ( count >= ModuleHeartbeatCount )
                    {
                        LOCALNICDOWN = true;
                        close(sockfd);
                        break;
                    }
                    else
                        sleep(5);
                }

                close(sockfd);
            }
        }

        // if the NIC is down, go directly to LAN outage processing
        if ( !LOCALNICDOWN )
        {
            for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++)
            {
                int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount;

                if ( moduleCount == 0)
                    continue;

                DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin();

                for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++)
                {
                    string moduleName = (*pt).DeviceName;
                    string ipAddr;
                    string hostName;
                    int moduleState = oam::INITIAL;
                    HostConfigList::iterator pt1 = (*pt).hostConfigList.begin();

                    for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++ )
                    {
                        ipAddr = (*pt1).IPAddr;
                        hostName = (*pt1).HostName;

                        if (enableModuleMonitor)
                        {
                            // perform ping test
                            cmd = cmdLine + ipAddr + cmdOption;
                            rtnCode = system(cmd.c_str());
                            rtnCode = WEXITSTATUS(rtnCode);
                        }
                        else
                            rtnCode = 0;

                        int currentNICState = oam::UP;

                        try
                        {
                            oam.getNICStatus(hostName, currentNICState);
                        }
                        catch (exception& ex)
                        {
//							string error = ex.what();
//							log.writeLog(__LINE__, "EXCEPTION ERROR on getNICStatus: " + error, LOG_TYPE_ERROR);
                        }
                        catch (...)
                        {
//							log.writeLog(__LINE__, "EXCEPTION ERROR on getNICStatus: Caught unknown exception!", LOG_TYPE_ERROR);
                        }

                        switch (rtnCode)
                        {
                            case 0:

                                //NIC Ack ping
                                if ( currentNICState != oam::UP )
                                {
                                    processManager.setNICState(hostName, oam::UP);

                                    if ( ModuleHeartbeatPeriod > 0 )
                                        //Clear an alarm
                                        aManager.sendAlarmReport(hostName.c_str(), NIC_DOWN_AUTO, CLEAR);
                                }

                                //set LAN Outage indicator to false since a module is responding
                                if ( moduleState == oam::INITIAL)
                                    if ( moduleName != config.moduleName())
                                        LANOUTAGESUPPORT = false;

                                //set Module State
                                if ( moduleState == oam::INITIAL || moduleState == oam::UP)
                                    moduleState = oam::UP;

                                break;

                            default:

                                //NIC failed to respond to ping
                                if ( currentNICState != oam::DOWN )
                                {
                                    log.writeLog(__LINE__, "NIC failed to respond to ping: " + hostName, LOG_TYPE_WARNING);
                                    processManager.setNICState(hostName, oam::DOWN);

                                    if ( ModuleHeartbeatPeriod > 0 )
                                        //Issue an alarm
                                        aManager.sendAlarmReport(hostName.c_str(), NIC_DOWN_AUTO, SET);
                                }

                                //set Module State
                                if ( moduleState == oam::INITIAL || moduleState == oam::DOWN)
                                    moduleState = oam::DOWN;
                                else
                                    // NIC 1 is up and NIC 2 is down
                                    moduleState = oam::DEGRADED;

                                break;
                        }
                    }

                    // if disable, default module state to up
                    if (!enableModuleMonitor)
                        moduleState = oam::UP;

                    // moduleState coming out of the NIC monitoring loop
                    // UP - ALL NICs passed ping test
                    // DEGRADED - NIC 1 passed, NIC 2 failed ping test
                    // DOWN - NIC 1 or ALL NICs failed ping test

                    int opState = oam::ACTIVE;

                    try
                    {
                        bool degraded;
                        oam.getModuleStatus(moduleName, opState, degraded);
                    }
                    catch (exception& ex)
                    {
//						string error = ex.what();
//						log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
                    }
                    catch (...)
                    {
//						log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
                    }

                    // skip module check if not inuse or in FAILED state
                    if (opState == oam::MAN_OFFLINE ||
                            opState == oam::MAN_DISABLED ||
                            opState == oam::FAILED)
                        continue;

                    //fast track a restart of a downed failover modules
                    if ( gdownActiveOAMModule == moduleName )
                    {
                        moduleInfoList[moduleName] = ModuleHeartbeatCount - 1;
                        gdownActiveOAMModule.clear();
                        moduleState = oam::DOWN;
                        downActiveOAMModule = true;
                    }

                    vector<string>::iterator pt2 = downModuleList.begin();

                    for ( ; pt2 != downModuleList.end() ; pt2++)
                    {
                        if ( *pt2 == moduleName )
                        {
                            moduleInfoList[moduleName] = ModuleHeartbeatCount - 1;
                            moduleState = oam::DOWN;
                            downModuleList.erase(pt2);
                            break;
                        }
                    }

                    switch (moduleState)
                    {
                        case oam::DEGRADED:
                            // do nothing for now
                            break;

                        case oam::UP:

// comment out, only come up when both nic are up, if not the pms list will not have the second nic in there
//						case oam::DEGRADED:
                            if (opState == oam::DOWN || opState == oam::INITIAL
                                    || opState == oam::AUTO_DISABLED)
                            {
                                //Set the module state to up
                                processManager.setModuleState(moduleName, moduleState);
                            }

                            if ( moduleName == config.OAMStandbyName() )
                                HOTSTANDBYACTIVE = true;

                            // if LAN OUTAGE ACTIVE, skip module checks
                            if (LANOUTAGEACTIVE)
                                break;

                            try
                            {
                                oam.getSystemConfig("MySQLRep", MySQLRep);
                            }
                            catch (...)
                            {
                                MySQLRep = "n";
                            }

                            if (moduleInfoList[moduleName] >= ModuleHeartbeatCount ||
                                    opState == oam::DOWN || opState == oam::AUTO_DISABLED)
                            {
								log.writeLog(__LINE__, "*** Module alive, bring it back online: " + moduleName, LOG_TYPE_DEBUG);

                                string PrimaryUMModuleName = config.moduleName();

                                try
                                {
                                    oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName);
                                }
                                catch (...) {}

                                bool busy = false;

                                for ( int retry = 0 ; retry < 20 ; retry++ )
                                {
                                    busy = false;
                                    ProcessStatus DMLprocessstatus;

                                    try
                                    {
                                        oam.getProcessStatus("DMLProc", PrimaryUMModuleName, DMLprocessstatus);

                                        if ( DMLprocessstatus.ProcessOpState == oam::BUSY_INIT)
                                        {
                                            log.writeLog(__LINE__, "DMLProc in BUSY_INIT, skip bringing module online " + moduleName, LOG_TYPE_DEBUG);
                                            busy = true;
                                            sleep(5);
                                        }
                                        else
                                            break;
                                    }
                                    catch (...)
                                    {
                                        sleep(5);
                                    }
                                }

                                if (busy)
                                    break;

                                //set query system state not ready
                                processManager.setQuerySystemState(false);

                                processManager.setSystemState(oam::BUSY_INIT);

                                processManager.reinitProcessType("cpimport");

                                // halt the dbrm
                                oam.dbrmctl("halt");
                                log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG);

                                aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, CLEAR);

                                //send notification
                                oam.sendDeviceNotification(config.moduleName(), MODULE_UP);

                                int status;
                                DBRootConfigList dbrootConfigList;

                                // if shared pm, move dbroots back to pm
                                if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) ||
                                        ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) ||
                                        ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") )
                                {

                                    //restart to get the versionbuffer files closed so it can be unmounted
                                    processManager.restartProcessType("WriteEngineServer", moduleName);

                                    //set module to enable state
                                    processManager.enableModule(moduleName, oam::AUTO_OFFLINE, true);

                                    downActiveOAMModule = false;
                                    int retry;

                                    for ( retry = 0 ; retry < 5 ; retry++ )
                                    {
                                        try
                                        {
                                            log.writeLog(__LINE__, "Call autoUnMovePmDbroot", LOG_TYPE_DEBUG);
                                            oam.autoUnMovePmDbroot(moduleName);

                                            //check if any dbroots got assigned back to this module
                                            // they could not be moved if there were busy on other pms
                                            try
                                            {
                                                int moduleID = atoi(moduleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str());
                                                oam.getPmDbrootConfig(moduleID, dbrootConfigList);

                                                if (  dbrootConfigList.size() == 0 )
                                                {
                                                    // no dbroots, fail module
                                                    log.writeLog(__LINE__, "autoUnMovePmDbroot left no dbroots mounted, failing module restart: " + moduleName, LOG_TYPE_WARNING);

                                                    //Issue an alarm
                                                    aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET);

                                                    //set module to disable state
                                                    processManager.disableModule(moduleName, true);

                                                    //call dbrm control
                                                    oam.dbrmctl("reload");
                                                    log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);

                                                    // resume the dbrm
                                                    oam.dbrmctl("resume");
                                                    log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);

                                                    //clear count
                                                    moduleInfoList[moduleName] = 0;

                                                    processManager.setSystemState(oam::ACTIVE);

                                                    //set query system state ready
                                                    processManager.setQuerySystemState(true);

													goto break_case;
                                                }
                                            }
                                            catch (...)
                                            {}

                                            log.writeLog(__LINE__, "autoUnMovePmDbroot success", LOG_TYPE_DEBUG);

                                            //distribute config file
                                            processManager.distributeConfigFile("system");

                                            break;
                                        }
                                        catch (...)
                                        {
                                            sleep(5);
                                        }
                                    }

                                    if ( retry == 5 )
                                    {
                                        log.writeLog(__LINE__, "autoUnMovePmDbroot: Failed. Fail Module", LOG_TYPE_WARNING);
										log.writeLog(__LINE__, "System DBRM READ ONLY - Verify dbroot mounts.", LOG_TYPE_WARNING);
                                        //Issue an alarm
                                        aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET);

                                        //set module to disable state
                                        processManager.disableModule(moduleName, true);

										// Need to do something here to verify data mounts before resuming
										// Best to assume if we reach this you need to put into readonly and verify all dbroots are mounted

										//call dbrm control
										oam.dbrmctl("readonly");
										log.writeLog(__LINE__, "'dbrmctl readonly' done", LOG_TYPE_DEBUG);

                                        //clear count
                                        moduleInfoList[moduleName] = 0;

										processManager.setSystemState(oam::DEGRADED);

                                        //set query system state ready
                                        processManager.setQuerySystemState(true);

                                        break;
                                    }
                                }
                                else
                                    //set module to enable state
                                    processManager.enableModule(moduleName, oam::AUTO_OFFLINE, true);

                                //restart module processes
                                int retry = 0;

                                int ModuleProcMonWaitCount = 12;

                                try
                                {
                                    oam.getSystemConfig("ModuleProcMonWaitCount", ModuleProcMonWaitCount);
                                }
                                catch (...)
                                {
                                    ModuleProcMonWaitCount = 12;
                                }

                                for ( ; retry < ModuleProcMonWaitCount ; retry ++ )
                                {
                                    // first, wait until module's ProcMon is ACTIVE
                                    int opState = oam::ACTIVE;

                                    try
                                    {
                                        ProcessStatus procstat;
                                        oam.getProcessStatus("ProcessMonitor", moduleName, procstat);
                                        opState = procstat.ProcessOpState;

                                        if (opState != oam::ACTIVE)
                                        {
                                            log.writeLog(__LINE__, "Waiting for Module ProcMon to go ACTIVE: " + moduleName, LOG_TYPE_DEBUG);
                                            sleep(5);
                                            continue;
                                        }
                                    }
                                    catch (exception& ex)
                                    {
//										string error = ex.what();
//										log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
                                        sleep(5);
                                        continue;
                                    }
                                    catch (...)
                                    {
//										log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
                                        sleep(5);
                                        continue;
                                    }

                                    //check and assign Elastic IP Address
                                    int AmazonElasticIPCount = 0;

                                    try
                                    {
                                        oam.getSystemConfig("AmazonElasticIPCount", AmazonElasticIPCount);
                                    }
                                    catch (...)
                                    {
                                        AmazonElasticIPCount = 0;
                                    }

                                    for ( int id = 1 ; id < AmazonElasticIPCount + 1 ; id++ )
                                    {
                                        string AmazonElasticModule = "AmazonElasticModule" + oam.itoa(id);
                                        string ELmoduleName;

                                        try
                                        {
                                            oam.getSystemConfig(AmazonElasticModule, ELmoduleName);
                                        }
                                        catch (...) {}

                                        if ( ELmoduleName == moduleName )
                                        {
                                            //match found assign Elastic IP Address
                                            string AmazonElasticIPAddr = "AmazonElasticIPAddr" + oam.itoa(id);
                                            string ELIPaddress;

                                            try
                                            {
                                                oam.getSystemConfig(AmazonElasticIPAddr, ELIPaddress);
                                            }
                                            catch (...) {}

                                            try
                                            {
                                                oam.assignElasticIP(hostName, ELIPaddress);
                                                log.writeLog(__LINE__, "Set Elastic IP Address: " + hostName + "/" + ELIPaddress, LOG_TYPE_DEBUG);
                                            }
                                            catch (...)
                                            {
                                                log.writeLog(__LINE__, "Failed to Set Elastic IP Address: " + hostName + "/" + ELIPaddress, LOG_TYPE_ERROR);
                                            }

                                            break;
                                        }
                                    }

                                    // next, stopmodule to start up clean
                                    status = processManager.stopModule(moduleName, oam::FORCEFUL, false);

                                    if ( status == oam::API_SUCCESS )
                                    {
                                        string newStandbyModule = processManager.getStandbyModule();

                                        if ( !newStandbyModule.empty() && newStandbyModule != "NONE")
                                        {
                                            processManager.setStandbyModule(newStandbyModule);
                                        }
                                        else
                                        {
                                            if ( newStandbyModule == "NONE")
                                                if ( moduleName.substr(0, MAX_MODULE_TYPE_SIZE) == "pm" )
                                                    processManager.setStandbyModule(moduleName);
                                        }

                                        if ((moduleName.find("pm") == 0) && (dbrootConfigList.size() > 0))
                                        {
                                            DBRootConfigList::iterator pt = dbrootConfigList.begin();

                                            if (( DBRootStorageType == "DataRedundancy") && (*pt == 1))
                                            {
                                                log.writeLog(__LINE__, "stopModule, " + config.moduleName(), LOG_TYPE_DEBUG);
                                                processManager.stopModule(config.moduleName(), oam::FORCEFUL, false);
                                                processManager.switchParentOAMModule(moduleName);
                                                processManager.stopProcess(config.moduleName(), "ProcessManager", oam::FORCEFUL, true);
                                                break;
                                            }
                                        }
                                    }
                                    else
                                    {
                                        //stop failed, retry
                                        log.writeLog(__LINE__, "stopModule, failed will retry: " + moduleName, LOG_TYPE_DEBUG);
                                        sleep(5);
                                        continue;
                                    }

                                    // next, startmodule
                                    status = processManager.startModule(moduleName, oam::FORCEFUL, oam::AUTO_OFFLINE);

                                    if ( status == oam::API_SUCCESS )
                                        break;

                                    log.writeLog(__LINE__, "startModule, failed will retry: " + moduleName, LOG_TYPE_DEBUG);

                                    //sleep and retry all over again
                                    sleep (5);
                                } // end of the retry loop

                                if ( retry < ModuleProcMonWaitCount )
                                {
                                    // module successfully started

                                    //call dbrm control, need to resume before start so the getdbrmfiles halt doesn't hang
                                    oam.dbrmctl("reload");
                                    log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);

                                    // resume the dbrm
                                    oam.dbrmctl("resume");
                                    log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);

                                    //set recycle process
                                    processManager.recycleProcess(moduleName);

                                    //distribute config file
                                    processManager.distributeConfigFile("system");
                                    sleep(1);

                                    string moduleType = moduleName.substr(0, MAX_MODULE_TYPE_SIZE);

                                    if ( MySQLRep == "y" )
                                    {
                                        if ( moduleType == "um" ||
                                                ( moduleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) ||
                                                ( moduleType == "pm" && PMwithUM == "y") )
                                        {

                                            //setup MySQL Replication for started modules

                                            log.writeLog(__LINE__, "Setup MySQL Replication for module recovering from outage on " + moduleName, LOG_TYPE_DEBUG);
                                            DeviceNetworkList devicenetworklist;
                                            DeviceNetworkConfig devicenetworkconfig;
                                            devicenetworkconfig.DeviceName = moduleName;
                                            devicenetworklist.push_back(devicenetworkconfig);
                                            processManager.setMySQLReplication(devicenetworklist, oam::UnassignedName, true);
                                        }
                                    }

                                    //set query system state ready
                                    processManager.setQuerySystemState(true);

                                    processManager.setSystemState(oam::ACTIVE);
                                    //clear count
                                    moduleInfoList[moduleName] = 0;
                                }
                                else
                                {
                                    // module failed to restart, place back in disabled state
                                    //Log failure, issue alarm, set moduleOpState
                                    Configuration config;

                                    //Issue an alarm
                                    aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET);

                                    // if pm, move dbroots back to pm
                                    if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) ||
                                            ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) ||
                                            ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") )
                                    {
                                        //move dbroots to other modules
                                        try
                                        {
                                            log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG);
                                            oam.autoMovePmDbroot(moduleName);
                                            log.writeLog(__LINE__, "autoMovePmDbroot success", LOG_TYPE_DEBUG);
                                            //distribute config file
                                            processManager.distributeConfigFile("system");
                                        }
                                        catch (exception& ex)
                                        {
                                            string error = ex.what();
                                            log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: " + error, LOG_TYPE_DEBUG);
                                        }
                                        catch (...)
                                        {
                                            log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR);
                                        }
                                    }

                                    //set module to disable state
                                    processManager.disableModule(moduleName, true);

                                    //call dbrm control
                                    oam.dbrmctl("reload");
                                    log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);

                                    // resume the dbrm
                                    oam.dbrmctl("resume");
                                    log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);

                                    log.writeLog(__LINE__, "Module failed to auto start: " + moduleName, LOG_TYPE_CRITICAL);

                                    if ( amazon )
                                        processManager.setSystemState(oam::FAILED);
                                    else
                                        processManager.setSystemState(oam::ACTIVE);

                                    //set query system state ready
                                    processManager.setQuerySystemState(true);

                                    //clear count
                                    moduleInfoList[moduleName] = 0;
                                }
                            }

                            break;

                        case oam::DOWN:

                            // if initial state, skip
                            if (opState == oam::INITIAL)
                                break;

                            // if disabled and not amazon, skip
                            if ( (opState == oam::AUTO_DISABLED) && !amazon)
                                break;

                            // if disabled, amazon,and NOT terminated skip
                            if ( (opState == oam::AUTO_DISABLED) && amazon)
                            {
                                // return values = 'ip address' for running or rebooting, stopped or terminated
                                string currentIPAddr = oam.getEC2InstanceIpAddress(hostName);

                                if ( currentIPAddr != "terminated")
                                    break;
                            }

                            log.writeLog(__LINE__, "module failed to respond to pings: " + moduleName, LOG_TYPE_WARNING);

                            //bump module ping failure counter
                            moduleInfoList[moduleName]++;

                            if ( moduleName == config.OAMStandbyName() )
                                HOTSTANDBYACTIVE = false;

                            if (moduleInfoList[moduleName] == ModuleHeartbeatCount)
                            {
                                // if LAN OUTAGE ACTIVE,skip module checks
                                if (LANOUTAGEACTIVE)
                                    break;

                                //check if down module is PrimaryUMModuleName
                                bool downPrimaryUM = false;
                                string PrimaryUMModuleName;

                                try
                                {
                                    oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName);
                                }
                                catch (...) {}

                                if ( PrimaryUMModuleName == moduleName )
                                    downPrimaryUM = true;

                                // if disabled, skip
                                if (opState != oam::AUTO_DISABLED )
                                {
                                    //Log failure, issue alarm, set moduleOpState
                                    Configuration config;
									log.writeLog(__LINE__, "*** module is down: " + moduleName, LOG_TYPE_CRITICAL);

                                    //set query system state not ready
                                    processManager.setQuerySystemState(false);

                                    processManager.setSystemState(oam::BUSY_INIT);

                                    processManager.reinitProcessType("cpimport");

                                    // halt the dbrm
                                    oam.dbrmctl("halt");
                                    log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG);

                                    processManager.setSystemState(oam::BUSY_INIT);

                                    //string cmd = "/etc/init.d/glusterd restart > /dev/null 2>&1";
                                    //system(cmd.c_str());

                                    //send notification
                                    oam.sendDeviceNotification(moduleName, MODULE_DOWN);

                                    //Issue an alarm
                                    aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET);

                                    //mark all processes running on module auto-offline
                                    processManager.setProcessStates(moduleName, oam::AUTO_OFFLINE);

                                    //set module to disable state
                                    processManager.disableModule(moduleName, false);

                                    //call dbrm control
                                    oam.dbrmctl("reload");
                                    log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);

                                    // if pm, move dbroots to other pms
                                    if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) ||
                                            ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) ||
                                            ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") )
                                    {
                                        string error;

                                        try
                                        {
                                            log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG);
                                            oam.autoMovePmDbroot(moduleName);
                                            log.writeLog(__LINE__, "autoMovePmDbroot success", LOG_TYPE_DEBUG);
                                            //distribute config file
                                            processManager.distributeConfigFile("system");
                                        }
                                        catch (exception& ex)
                                        {
                                            string error = ex.what();
                                            log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: " + error, LOG_TYPE_DEBUG);
                                        }
                                        catch (...)
                                        {
                                            log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR);
                                        }

                                        if ( error == oam.itoa(oam::API_DETACH_FAILURE) )
                                        {
                                            processManager.setModuleState(moduleName, oam::AUTO_DISABLED);

                                            // resume the dbrm
                                            oam.dbrmctl("resume");
                                            log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);

                                            //set query system state ready
                                            processManager.setQuerySystemState(true);

                                            break;
                                        }
                                    }
                                }

                                // if Cloud Instance
                                // state = terminate, remove/addmodule to launch new instance
                                if ( amazon )
                                {
                                    if ( moduleName.find("um") == 0 )
                                    {
                                        // resume the dbrm
                                        oam.dbrmctl("resume");
                                        log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
                                    }

                                    // return values = 'ip address' for running or rebooting, stopped or terminated
                                    string currentIPAddr = oam.getEC2InstanceIpAddress(hostName);

                                    if ( currentIPAddr == "terminated")
                                    {
                                        //check if down module was Standby OAM, if so find another one
                                        if ( moduleName == config.OAMStandbyName() )
                                        {

                                            //set down module ProcessManager to AOS
                                            processManager.setProcessState(moduleName, "ProcessManager", oam::AUTO_OFFLINE, 0);

                                            //get another standby OAM module
                                            string newStandbyModule = processManager.getStandbyModule();

                                            //send message to start new Standby Process-Manager, if needed
                                            if ( !newStandbyModule.empty() && newStandbyModule != "NONE")
                                            {
                                                processManager.setStandbyModule(newStandbyModule);
                                            }
                                            else
                                            {
                                                Config* sysConfig = Config::makeConfig();

                                                // clear Standby OAM Module
                                                sysConfig->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName);
                                                sysConfig->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr);

                                                //update Calpont Config table
                                                try
                                                {
                                                    sysConfig->write();
                                                }
                                                catch (...)
                                                {
                                                    log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR);
                                                }
                                            }
                                        }

                                        // remove/addmodule
                                        log.writeLog(__LINE__, "Instance terminated, re-launching: " + hostName, LOG_TYPE_DEBUG);

                                        // if pm, get assigned dbroots and deattach EBS
                                        DBRootConfigList dbrootConfigList;
                                        int moduleID = atoi(moduleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str());

                                        if ( moduleName.find("pm") == 0 )
                                        {
                                            //get dbroots ids for to PM
                                            try
                                            {
                                                oam.getPmDbrootConfig(moduleID, dbrootConfigList);
                                            }
                                            catch (exception& e)
                                            {
                                                log.writeLog(__LINE__, "ERROR: getPmDbrootConfig error: " + moduleName, LOG_TYPE_DEBUG);
                                            }
                                        }

                                        DeviceNetworkList devicenetworklist;
                                        DeviceNetworkConfig devicenetworkconfig;
                                        HostConfig hostconfig;

                                        devicenetworkconfig.DeviceName = moduleName;

                                        if (cloud == "amazon-vpc")
                                            hostconfig.IPAddr = ipAddr;
                                        else
                                            hostconfig.IPAddr = oam::UnassignedName;

                                        hostconfig.HostName = oam::UnassignedName;
                                        hostconfig.NicID = 1;
                                        devicenetworkconfig.hostConfigList.push_back(hostconfig);

                                        devicenetworklist.push_back(devicenetworkconfig);

                                        bool pass = true;

                                        for ( int addRetry = 0 ; addRetry < 5 ; addRetry++ )
                                        {
                                            //remove module
                                            int ret = processManager.removeModule(devicenetworklist, false);

                                            if ( ret != oam::API_SUCCESS )
                                            {
                                                log.writeLog(__LINE__, "Instance failed to remove, retry: " + moduleName, LOG_TYPE_DEBUG);
                                            }
                                            else
                                            {
                                                pass = true;
                                                log.writeLog(__LINE__, "Instance removed, module: " + moduleName, LOG_TYPE_DEBUG);
                                            }

                                            // add module
					    ret = processManager.addModule(devicenetworklist, "ssh", false);

                                            if ( ret != oam::API_SUCCESS )
                                            {
                                                log.writeLog(__LINE__, "Instance failed to add, retry: " + moduleName, LOG_TYPE_CRITICAL);
                                                pass = false;
                                            }
                                            else
                                            {
                                                pass = true;
                                                log.writeLog(__LINE__, "New Instance Launched for " + moduleName, LOG_TYPE_DEBUG);

                                                // if pm, config and attach EBS
                                                if ( moduleName.find("pm") == 0 && !dbrootConfigList.empty() )
                                                {
                                                    try
                                                    {
                                                        oam.setPmDbrootConfig(moduleID, dbrootConfigList);

                                                        std::vector<std::string> dbrootList;
                                                        DBRootConfigList::iterator pt1 = dbrootConfigList.begin();

                                                        for ( ; pt1 != dbrootConfigList.end() ; pt1++)
                                                        {
                                                            dbrootList.push_back(oam.itoa(*pt1));
                                                        }

                                                        //attach EBS
                                                        try
                                                        {
                                                            oam.amazonReattach(moduleName, dbrootList, true);
                                                            pass = true;
                                                            break;
                                                        }
                                                        catch (exception& e)
                                                        {
                                                            log.writeLog(__LINE__, "ERROR: amazonReattach error on " + moduleName, LOG_TYPE_ERROR);
                                                            pass = false;
                                                        }
                                                    }
                                                    catch (exception& e)
                                                    {
                                                        log.writeLog(__LINE__, "ERROR: setPmDbrootConfig error on " + moduleName, LOG_TYPE_ERROR);
                                                        pass = false;
                                                    }
                                                }
                                                else
                                                {
                                                    pass = true;
                                                    break;
                                                }
                                            }

                                            if (pass)
                                                break;
                                        }

                                        if (pass)
                                            //Set the module state so it will be brought back up
                                            processManager.setModuleState(moduleName, oam::AUTO_DISABLED);
                                        else
                                        {
                                            //new instance failed to get added
                                            //remove and try auto moving dbroots to other pms
                                            processManager.removeModule(devicenetworklist, false);

                                            // if pm, move dbroots to other pms
                                            if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) ||
                                                    ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) ||
                                                    ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") )
                                            {
                                                try
                                                {
                                                    log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG);
                                                    oam.autoMovePmDbroot(moduleName);
                                                    log.writeLog(__LINE__, "autoMovePmDbroot success", LOG_TYPE_DEBUG);
                                                    //distribute config file
                                                    processManager.distributeConfigFile("system");
                                                }
                                                catch (exception& ex)
                                                {
                                                    string error = ex.what();
                                                    log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: " + error, LOG_TYPE_DEBUG);
                                                }
                                                catch (...)
                                                {
                                                    log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR);
                                                }
                                            }

                                            //set recycle process
                                            processManager.recycleProcess(moduleName);

                                            //set query system state ready
                                            processManager.setQuerySystemState(true);

                                            sleep(2);
                                            processManager.setSystemState(oam::ACTIVE);
                                        }
                                    }

                                    if ( ( moduleName.find("pm") == 0 ) &&
                                            ( opState != oam::AUTO_DISABLED ) )

                                    {
                                        // resume the dbrm
                                        oam.dbrmctl("resume");
                                        log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);

                                        //set query system state ready
                                        processManager.setQuerySystemState(true);
                                    }
                                }
                                else
                                {
                                    // non-amazon
                                    // resume the dbrm
                                    oam.dbrmctl("resume");
                                    log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);

                                    //set recycle process
                                    processManager.recycleProcess(moduleName);

                                    //set query system state ready
                                    processManager.setQuerySystemState(true);
                                }

                                //check if down module was Standby OAM, if so find another one
                                if ( moduleName == config.OAMStandbyName() )
                                {

                                    //set down module ProcessManager to AOS
                                    processManager.setProcessState(moduleName, "ProcessManager", oam::AUTO_OFFLINE, 0);

                                    //get another standby OAM module
                                    string newStandbyModule = processManager.getStandbyModule();

                                    //send message to start new Standby Process-Manager, if needed
                                    if ( !newStandbyModule.empty() && newStandbyModule != "NONE")
                                    {
                                        processManager.setStandbyModule(newStandbyModule);
                                    }
                                    else
                                    {
                                        Config* sysConfig = Config::makeConfig();

                                        // clear Standby OAM Module
                                        sysConfig->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName);
                                        sysConfig->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr);

                                        //update Calpont Config table
                                        try
                                        {
                                            sysConfig->write();
                                        }
                                        catch (...)
                                        {
                                            log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR);
                                        }
                                    }
                                }

                                // reset up mysql rep slaves is master changed
                                if ( downPrimaryUM &&
                                        ( MySQLRep == "y" ) )
                                {
                                    //setup MySQL Replication for started modules
                                    log.writeLog(__LINE__, "Setup MySQL Replication for module outage on " + moduleName, LOG_TYPE_DEBUG);
                                    DeviceNetworkList devicenetworklist;
                                    processManager.setMySQLReplication(devicenetworklist);
                                }

                                // if disabled and amazon, break out
                                if ( (opState == oam::AUTO_DISABLED ) && amazon )
                                    break;

                                //start SIMPLEX runtype processes on a SIMPLEX runtype module
                                string moduletype = moduleName.substr(0, MAX_MODULE_TYPE_SIZE);

                                try
                                {
                                    oam.getSystemConfig(moduletype, moduletypeconfig);
                                }
                                catch (exception& ex)
                                {
                                    string error = ex.what();
                                    log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
                                }
                                catch (...)
                                {
                                    log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
                                }

                                if ( moduletypeconfig.RunType == SIMPLEX )
                                {
                                    DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin();

                                    for ( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++)
                                    {
                                        string launchModuleName = (*pt).DeviceName;
                                        string launchModuletype = launchModuleName.substr(0, MAX_MODULE_TYPE_SIZE);

                                        if ( moduletype != launchModuletype )
                                            continue;

                                        //skip if active pm module (local module)
                                        if ( launchModuleName == config.moduleName() )
                                            continue;

                                        if ( moduleName != launchModuleName )
                                        {
                                            //check if module is active before starting any SIMPLEX STANDBY apps
                                            try
                                            {
                                                int launchopState = oam::ACTIVE;
                                                bool degraded;
                                                oam.getModuleStatus(launchModuleName, launchopState, degraded);

                                                if (launchopState != oam::ACTIVE && launchopState != oam::STANDBY )
                                                {
                                                    continue;
                                                }
                                            }
                                            catch (exception& ex)
                                            {
//												string error = ex.what();
//												log.writeLog(__LINE__, "EXCEPTION ERROR on : " + error, LOG_TYPE_ERROR);
                                            }
                                            catch (...)
                                            {
//												log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
                                            }

                                            int status;
                                            log.writeLog(__LINE__, "Starting up STANDBY process on module " + launchModuleName, LOG_TYPE_DEBUG);

                                            for ( int j = 0 ; j < 20 ; j ++ )
                                            {
                                                status = processManager.startModule(launchModuleName, oam::FORCEFUL, oam::AUTO_OFFLINE);

                                                if ( status == API_SUCCESS)
                                                    break;
                                            }

                                            log.writeLog(__LINE__, "pingDeviceThread: ACK received from '" + launchModuleName + "' Process-Monitor, return status = " + oam.itoa(status), LOG_TYPE_DEBUG);
                                        }
                                    }
                                }
                            }

                            break;
                    }
                }
            } //end of for loop
        }
		break_case:

        // check and take action if LAN outage is flagged
        if (LANOUTAGESUPPORT && !LANOUTAGEACTIVE && LOCALNICDOWN)
        {
            log.writeLog(__LINE__, "LAN Failure detected", LOG_TYPE_CRITICAL);

            oam.sendDeviceNotification(config.moduleName(), START_PM_MASTER_DOWN);

            LANOUTAGEACTIVE = true;

            log.writeLog(__LINE__, "Kill any cpimport running", LOG_TYPE_INFO);
            system("pkill -9 cpimport");

            //request stop of local module
            int status = processManager.stopModule(config.moduleName(), oam::FORCEFUL, false);

            if ( status != oam::API_SUCCESS )
                log.writeLog(__LINE__, "stopmodule failed", LOG_TYPE_ERROR);

            //stop snmptrap daemon process
            processManager.stopProcess(config.moduleName(), "SNMPTrapDaemon", oam::FORCEFUL, false);
        }
        else
        {
            if ( LANOUTAGEACTIVE && HOTSTANDBYACTIVE && !LOCALNICDOWN)
            {
//				pthread_mutex_unlock(&THREAD_LOCK);
                LANOUTAGEACTIVE = false;

                log.writeLog(__LINE__, "LAN Failure recovery");

                //check if this module still is active according to last know hot standby module
                ByteStream msg;
                ByteStream::byte requestID = GETPARENTOAMMODULE;
                msg << requestID;

                string parentOAMModule = processManager.sendMsgProcMon1( config.OAMStandbyName(), msg, requestID );

                if ( parentOAMModule == config.moduleName() ||
                        parentOAMModule == "FAILED" )
                {

                    //srestart to these guys incase they marked any PrimProcs offline
                    processManager.restartProcessType("ExeMgr");
                    processManager.reinitProcessType("DDLProc");
                    processManager.reinitProcessType("DMLProc");
                }
                else
                {
                    //send message to local Process Monitor to run coldStandby
                    ByteStream msg;
                    ByteStream::byte requestID = OAMPARENTCOLD;

                    msg << requestID;

                    int returnStatus = processManager.sendMsgProcMon( config.moduleName(), msg, requestID );
                    log.writeLog(__LINE__, "sent OAM Parent Cold message to local Process-Monitor, status: " + oam.itoa(returnStatus), LOG_TYPE_DEBUG);

                    //request stop of local module
                    int status = processManager.stopModule(config.moduleName(), oam::INSTALL, false);

                    if ( status != oam::API_SUCCESS )
                        log.writeLog(__LINE__, "stopmodule failed", LOG_TYPE_ERROR);
                }
            }
        }

        //
        // ping ext devices
        //

        // read each time to catch updates
        systemextdeviceconfig.extdeviceconfig.clear();

        try
        {
            oam.getSystemConfig(systemextdeviceconfig);
        }
        catch (exception& ex)
        {
            string error = ex.what();
//			log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
        }
        catch (...)
        {
//			log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
        }

        for ( unsigned int i = 0 ; i < systemextdeviceconfig.Count ; i++ )
        {
            string extDeviceName = systemextdeviceconfig.extdeviceconfig[i].Name;
            string ipAddr = systemextdeviceconfig.extdeviceconfig[i].IPAddr;

            int opState = oam::ACTIVE;

            try
            {
                oam.getExtDeviceStatus(extDeviceName, opState);
            }
            catch (exception& ex)
            {
//				string error = ex.what();
//				log.writeLog(__LINE__, "EXCEPTION ERROR on getExtDeviceStatus: " + error, LOG_TYPE_ERROR);
            }
            catch (...)
            {
//				log.writeLog(__LINE__, "EXCEPTION ERROR on getExtDeviceStatus: Caught unknown exception!", LOG_TYPE_ERROR);
            }

            cmd = cmdLine + ipAddr + cmdOption;
            rtnCode = system(cmd.c_str());

            switch (WEXITSTATUS(rtnCode))
            {
                case 0:

                    //Switch Ack ping, Check whether alarm have been issued
                    if (extDeviceInfoList[extDeviceName] >= ModuleHeartbeatCount)
                    {
                        aManager.sendAlarmReport(extDeviceName.c_str(), EXT_DEVICE_DOWN_AUTO, CLEAR);

                    }

                    extDeviceInfoList[extDeviceName] = 0;

                    if (opState != oam::ACTIVE)
                    {
                        //Set the switch state to active
                        processManager.setExtdeviceState(extDeviceName, oam::ACTIVE);
                    }

                    break;

                default:
                    //extDevice failed to respond to ping
                    log.writeLog(__LINE__, "extDevice failed to respond to ping: " + extDeviceName, LOG_TYPE_WARNING);
                    extDeviceInfoList[extDeviceName]++;

                    if (extDeviceInfoList[extDeviceName] == ModuleHeartbeatCount)
                    {
                        //Log failure, issue alarm, set extDeviceOpState
                        log.writeLog(__LINE__, "extDevice is down: " + extDeviceName, LOG_TYPE_CRITICAL);

                        processManager.setExtdeviceState(extDeviceName, oam::AUTO_OFFLINE);

                        //Issue an alarm
                        aManager.sendAlarmReport(extDeviceName.c_str(), EXT_DEVICE_DOWN_AUTO, SET);
                    }

                    break;
            }
        } //end of for loop

        // double check to make sure the system status is ACTIVE if all module status's are ACTIVE
        try
        {
            if (dbrm.isDBRMReady())
            {
                int systemReady = dbrm.getSystemReady();    // -1 == fail, 0 == not ready, 1 == ready

                if (systemReady > 0)
                {
                    bool updateActive = true;

                    for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++)
                    {
                        int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount;

                        if ( moduleCount == 0)
                            continue;

                        DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin();

                        for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++)
                        {
                            string moduleName = (*pt).DeviceName;

                            int opState = oam::ACTIVE;

                            try
                            {
                                bool degraded;
                                oam.getModuleStatus(moduleName, opState, degraded);

                                if (opState == oam::ACTIVE ||
                                        opState == oam::DEGRADED ||
                                        opState == oam::MAN_DISABLED ||
                                        opState == oam::AUTO_DISABLED )
                                    continue;

                                updateActive = false;
                            }
                            catch (exception& ex)
                            {
                                //                            string error = ex.what();
                                //                          log.writeLog(__LINE__, "EXCEPTION ERROR on : " + error, LOG_TYPE_ERROR);
                            }
                            catch (...)
                            {
                                //                            log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
                            }
                        }
                    }

                    if (updateActive)
                    {
//						log.writeLog(__LINE__, "Modules are ACTIVE, check system state ", LOG_TYPE_DEBUG);

                        string PrimaryUMModuleName;

                        try
                        {
                            oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName);
                        }
                        catch (...) {}

//						log.writeLog(__LINE__, "PrimaryUMModuleName = " + PrimaryUMModuleName, LOG_TYPE_DEBUG);

                        ProcessStatus DMLprocessstatus;

                        try
                        {
                            oam.getProcessStatus("DMLProc", PrimaryUMModuleName, DMLprocessstatus);
                        }
                        catch (exception& ex)
                        {
                            //						string error = ex.what();
                            //						log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
                        }
                        catch (...)
                        {
                            //						log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
                        }

//						log.writeLog(__LINE__, "DMLPROC STATUS = " + oamState[DMLprocessstatus.ProcessOpState], LOG_TYPE_DEBUG);

                        if (DMLprocessstatus.ProcessOpState == oam::ACTIVE)
                        {

                            //set the system status if a change has occurred
                            SystemStatus systemstatus;

                            try
                            {
                                oam.getSystemStatus(systemstatus);
                            }
                            catch (exception& ex)
                            {
                                //							string error = ex.what();
                                //							log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: " + error, LOG_TYPE_ERROR);
                            }
                            catch (...)
                            {
                                //							log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: Caught unknown exception!", LOG_TYPE_ERROR);
                            }

                            if ( systemstatus.SystemOpState != oam::ACTIVE )
                            {
                                processManager.setSystemState(oam::ACTIVE);
                            }
                        }

                        if (DMLprocessstatus.ProcessOpState == oam::BUSY_INIT)
                        {

                            //set the system status if a change has occurred
                            SystemStatus systemstatus;

                            try
                            {
                                oam.getSystemStatus(systemstatus);
                            }
                            catch (exception& ex)
                            {
                                //							string error = ex.what();
                                //							log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: " + error, LOG_TYPE_ERROR);
                            }
                            catch (...)
                            {
                                //							log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: Caught unknown exception!", LOG_TYPE_ERROR);
                            }

                            if ( systemstatus.SystemOpState != oam::BUSY_INIT )
                            {
                                processManager.setSystemState(oam::BUSY_INIT);
                            }
                        }
                    }
                }
            }
        }
        catch (...)
        {
        }

        //go sleep for a bit
        int sleepTime = ModuleHeartbeatPeriod / 10;

        if (!enableModuleMonitor && systemextdeviceconfig.Count == 0)
            sleep(60);
        else
            sleep(sleepTime);
    }

    return;
}

/******************************************************************************************
* @brief      hdfsActiveAlarmsPushingThread
*
* purpose:    Push an image of ActiveAlarms to HDFS for non-OAMParentModule to view.
*
******************************************************************************************/
static void hdfsActiveAlarmsPushingThread()
{
    boost::filesystem::path filePath(ACTIVE_ALARM_FILE);
    boost::filesystem::path dirPath = filePath.parent_path();
    string dirName = boost::filesystem::canonical(dirPath).string();

    if (boost::filesystem::exists("/etc/pdsh/machines"))
    {
        string cpCmd =  "pdcp -a -x " + localHostName + " " + ACTIVE_ALARM_FILE + " " + dirName +
                        " > /dev/null 2>&1";
        string rmCmd =  "pdsh -a -x " + localHostName + " rm -f " + ACTIVE_ALARM_FILE +
                        " > /dev/null 2>&1";

        while (1)
        {
            if (boost::filesystem::exists(filePath))
                system(cpCmd.c_str());
            else
                system(rmCmd.c_str());

            sleep(ACTIVE_ALARMS_PUSHING_INTERVAL);
        }
    }

    return;
}


/*****************************************************************************************
* @brief	Processor Heartbeat Msg Thread
*
* purpose:	Read Heartbeat Messages from other Processes
*
*****************************************************************************************/
/*
static void heartbeatMsgThread()
{
	ProcessLog log;
	Configuration config;
	ProcessManager processManager(config, log);

	//
	//waiting for request
	//
	ByteStream receivedMSG;
	IOSocket fIos;

	for (;;)
	{
		try
		{
			MessageQueueServer procmgr("ProcHeartbeatControl");
			for (;;)
			{
				try
				{
					fIos = procmgr.accept();
					receivedMSG = fIos.read();

					if (receivedMSG.length() > 0) {
						processManager.processMSG(fIos, receivedMSG);
					}
				}
				catch (exception& ex)
				{
					string error = ex.what();
					log.writeLog(__LINE__, "EXCEPTION ERROR on ProcHeartbeatControl.accept: " + error, LOG_TYPE_ERROR);
				}
				catch(...)
				{
					log.writeLog(__LINE__, "EXCEPTION ERROR on ProcHeartbeatControl.accept: Caught unknown exception!", LOG_TYPE_ERROR);
				}

				fIos.close();
			}
		}
        catch (exception& ex)
        {
			string error = ex.what();
			log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr:" + error, LOG_TYPE_ERROR);
			// takes 2 - 4 minites to free sockets, sleep and retry
			sleep(60);
        }
        catch(...)
        {
			log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcHeartbeatControl: Caught unknown exception!", LOG_TYPE_ERROR);
			// takes 2 - 4 minites to free sockets, sleep and retry
			sleep(60);
        }
	}

}
*/

/*****************************************************************************************
* @brief	Processor Heartbeat Thread
*
* purpose:	Check Heartbeat Messages from other Processes
*
*****************************************************************************************/
/*
static void heartbeatProcessThread()
{
	ProcessLog log;
	Configuration config;
	ProcessManager processManager(config, log);
	Oam oam;
	ALARMManager aManager;

	int processHeartbeatPeriod=60;	//default value to 60 seconds

	log.writeLog(__LINE__, "Thread Launched: Process Heartbeat!!!");

	while (true)
	{
		//
		// check and report on register process not sending heartbeats
		//

		// get process heartbeat period
		try {
			oam.getSystemConfig("ProcessHeartbeatPeriod", processHeartbeatPeriod);
			processHeartbeatPeriod = processHeartbeatPeriod * 60;
		}
		catch (exception& ex)
		{
			string error = ex.what();
			log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
		}
		catch(...)
		{
			log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
		}

		Oam oam;
		log.writeLog(__LINE__, "Process Heartbeat check started, Heartbeat period is " + oam.itoa(processHeartbeatPeriod), LOG_TYPE_DEBUG);

		sleep(processHeartbeatPeriod);

		HeartBeatProcList::iterator list = hbproclist.begin();
		for( ; list != hbproclist.end() ; list++)
		{
			string moduleName = (*list).ModuleName;
			string processName = (*list).ProcessName;
			int id = (*list).ID;

			// get Process state and only check if ACTIVE
			ProcessStatus procstat;
			try{
				oam.getProcessStatus(processName, moduleName, procstat);
			}
			catch (exception& ex)
			{
				string error = ex.what();
				log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
				procstat.ProcessOpState = oam::MAN_OFFLINE;
			}
			catch(...)
			{
				log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
				procstat.ProcessOpState = oam::MAN_OFFLINE;
			}

			if ( procstat.ProcessOpState == oam::ACTIVE ) {
				// skip testing if Heartbeat is disable
				if( processHeartbeatPeriod != -1 ) {
//log.writeLog(__LINE__, "Heartbeat: Process being monitored: " + moduleName + " / " + processName + " / " + oam.itoa(id), LOG_TYPE_DEBUG);
					if ( !(*list).receiveFlag ) {
						// got a missing heartbeat, request a restart on the process
						log.writeLog(__LINE__, "heartbeatProcessThread: Failure from process " + moduleName + " / " + processName+ " / " + oam.itoa(id), LOG_TYPE_WARNING);

						oam.restartProcess(moduleName, processName, FORCEFUL, ACK_NO);
						(*list).receiveFlag = true;
						// reset all other entries for this process
						HeartBeatProcList::iterator list1 = hbproclist.begin();
						for( ; list1 != hbproclist.end() ; list1++)
						{
							string moduleName1 = (*list1).ModuleName;
							string processName1 = (*list1).ProcessName;
							if ( moduleName == moduleName1 && processName == processName1 )
								(*list1).receiveFlag = true;
						}
					}
					else
						// reset receive heartbeat indication flag
						(*list).receiveFlag = false;
				}
				else
					// heartbeat is disabled
					(*list).receiveFlag=true;
			}
			else
			{	// registered process not active, remove from list
				hbproclist.erase(list);
				log.writeLog(__LINE__, "Removing OOS Process from Heartbeat Monitor list: " + moduleName + " / " + processName+ " / " + oam.itoa(id));
				break;
			}
		}
	} // end of while forever loop
}
*/
// vim:ts=4 sw=4:
